*******************************************************
******** PSID: Merge Parents and Children 			***
******** 	   Add Transition into Adulthood data 	***
*******************************************************

clear

global TA "F:\Uni\Daten\Household Panels\PSID\Transition into Adulthood Supplement"
global PSID "F:\Uni\Daten\Household Panels\PSID\Cross-Year Individual"
global FAM "F:\Uni\Daten\Household Panels\PSID\Family Files"
global OUT "F:\Uni\Forschung\Projekte\Soziale Ungleichheit\Daten\Household Panels\PSID\Data"

// Note: place the do-files that come with the data (for reading in the data) in the same folder


*** Transition into Adulthood

* load TA data and create identifiers

foreach w in 05 07 09 11 13 15 17 {
	cd "$TA\TA20`w'"
	do  TA20`w'.do
	
	mvdecode TA`w'0003 TA`w'0004, mv(0)
	gen pid20`w' = (TA`w'0003 * 1000) + TA`w'0004

	save TA20`w'.dta, replace
}

**** Core Data
clear
do "$PSID\IND2017ER.do"

*set wd
cd "$OUT"

*create identifiers for individuals and parents

*code missings
foreach var of varlist ER30001 ER30002 ER32009 ER32010 ER32016 ER32017 	/// Individual core data
			ER33801 ER33802 ER33901 ER33902 ER34001 ER34002 ER34101 	/// TA data
			ER34102 ER34201 ER34202 ER34301 ER34302 ER34501 ER34502	{
	mvdecode `var', mv(0)
}	

*pids
gen pid = (ER30001 * 1000) + ER30002
gen mpid = (ER32009 * 1000) + ER32010
gen fpid = (ER32016 * 1000) + ER32017


*create wave-specific identifiers to merge core data and TA

gen pid2005 = (ER33801 * 1000) + ER33802
gen pid2007 = (ER33901 * 1000) + ER33902
gen pid2009 = (ER34001 * 1000) + ER34002
gen pid2011 = (ER34101 * 1000) + ER34102
gen pid2013 = (ER34201 * 1000) + ER34202
gen pid2015 = (ER34301 * 1000) + ER34302
gen pid2017 = (ER34501 * 1000) + ER34502

save psid.dta, replace       



*** merge PSID core and TA 

use psid, replace

merge m:1 pid2005 using "$TA\TA2005\TA2005.dta", gen(merge_2005) update replace
merge m:1 pid2007 using "$TA\TA2007\TA2007.dta", gen(merge_2007) update replace
merge m:1 pid2009 using "$TA\TA2009\TA2009.dta", gen(merge_2009) update replace
merge m:1 pid2011 using "$TA\TA2011\TA2011.dta", gen(merge_2011) update replace
merge m:1 pid2013 using "$TA\TA2013\TA2013.dta", gen(merge_2013) update replace
merge m:1 pid2015 using "$TA\TA2015\TA2015.dta", gen(merge_2015) update replace
merge m:1 pid2017 using "$TA\TA2017\TA2017.dta", gen(merge_2017) update replace



*recode yearly family identifiers
//1968-1997
local i = 1967
foreach var of varlist ER30001 ER30020 ER30043 ER30067 ER30091 ER30117 ER30138 ///
	ER30160 ER30188 ER30217 ER30246 ER30283 ER30313 ER30343 ER30373 ER30399 ///
	ER30429 ER30463 ER30498 ER30535 ER30570 ER30606 ER30642 ER30689 ER30733 ///
	ER30806 ER33101 ER33201 ER33301 ER33401 {
	local i = `i'+1
	rename `var' famid`i'
}

//1999-2017
local i = 1997
foreach var of varlist ER33501 ER33601 ER33701 ER33801 ER33901 ER34001 ///
		ER34101 ER34201 ER34301 ER34501 {
	local i = `i'+2
	rename `var' famid`i'
}

save psid_ta.dta, replace
erase psid.dta


**** merge PSID core and TA with family data

*recode yearly familiy files
clear 

// until 1993
forvalues w = 68(1)93 {
	clear
	cd "$FAM\fam19`w'\"
	do "FAM19`w'.do"
	quie ds
	local varlist =  r(varlist)
	local intid : word 2 of `varlist'
	di `intid'
	clonevar famid19`w' = `intid'
	
	isvar V81 V529 V1514 V2226 V2852 V3256 V3676 V4154 V5029 V5626 ///
			V6173 V6766 V7412 V8065 V8689 V9375 V11022 V12371 V13623 V14670 ///
			V16144 V17533 V18875 V20175 V21481 V23322 
	local VARlist "`r(varlist)'"
	capture clonevar faminc19`w' = `VARlist'
		
	keep famid19`w' faminc19`w'
	save FAM19`w'_short.dta, replace
}	


//1994-1997
forvalues w = 94(1)97 {
	clear
	cd "$FAM\fam19`w'er\"
	do "FAM19`w'ER.do"
	quie ds
	local varlist =  r(varlist)
	local intid : word 2 of `varlist'
	di `intid'
	clonevar famid19`w' = `intid'
	
	isvar ER4153 ER6993 ER9244 ER12079
	local VARlist "`r(varlist)'"
	capture clonevar faminc19`w' = `VARlist'
		
	keep famid19`w' faminc19`w'
	save FAM19`w'_short.dta, replace
}	

//1999
clear
cd "$FAM\fam1999er\"
do "FAM1999ER.do"

clonevar famid1999 = ER13002
clonevar faminc1999 = ER16462

keep famid1999 faminc1999
save FAM1999_short.dta, replace


//2001-2017
foreach w in 01 03 05 07 09 11 13 15 17 {
	clear
	cd "$FAM\fam20`w'er\"
	do "FAM20`w'ER.do"
	quie ds
	local varlist =  r(varlist)
	local intid : word 2 of `varlist'
	di `intid'
	clonevar famid20`w' = `intid'
	
	isvar ER20456 ER24099 ER28037 ER41027 ER46935 ER52343 ER58152 ER65349 ER71426 
	local VARlist "`r(varlist)'"
	capture clonevar faminc20`w' = `VARlist'
	
	isvar ER17012 ER21016 ER25016 ER36016 ER42016 ER47316 ER53016 ER60016 ER66016
	local VARlist2 "`r(varlist)'"
	capture clonevar hhsize20`w' = `VARlist2'
	
	keep famid20`w' faminc20`w' hhsize20`w'
	save FAM20`w'_short.dta, replace
}	


*merge data 
use "$OUT\psid_ta.dta", clear

forvalues i = 1968(1)1993 {
	merge m:1 famid`i' using "$FAM\fam`i'\FAM`i'_short.dta", gen(merge_fam`i') keep(master match)
}	
forvalues i = 1994(1)1997 {
	merge m:1 famid`i' using "$FAM\fam`i'er\FAM`i'_short.dta", gen(merge_fam`i') keep(master match)
}	
forvalues i = 1999(2)2017 {
	merge m:1 famid`i' using "$FAM\fam`i'er\FAM`i'_short.dta", gen(merge_fam`i') keep(master match)
}		 

save "$OUT\psid_ta_fam.dta", replace
erase "$OUT\psid_ta.dta"

************************
*** Recode variables ***
************************

* political participation
*fre TA050028 TA070028 TA090029 TA110030 TA130029 TA150029 TA170024

foreach w in 05 07 {
	recode TA`w'0028 (1=1 "yes")(5=0 "no")(8=.a "DK")(9=.b "NA/refused") ///
						(0=.c "too young"), gen(vote20`w')
	lab var vote20`w' "Voted in 20`w'"
}

foreach w in 09 13 15 {
	recode TA`w'0029 (1=1 "yes")(5=0 "no")(8=.a "DK")(9=.b "NA/refused") ///
						(0=.c "too young"), gen(vote20`w')
	lab var vote20`w' "Voted in 20`w'"
}

recode TA110030 (1=1 "yes")(5=0 "no")(8=.a "DK")(9=.b "NA/refused") ///
						(0=.c "too young"), gen(vote2011)
lab var vote2011 "Voted in 2011"

recode TA170024 (1=1 "yes")(5=0 "no")(8=.a "DK")(9=.b "NA/refused") ///
						(0=.c "too young"), gen(vote2017)
lab var vote2017 "Voted in 2017"



*** socio-demographics

*employment status
local i=1980	
foreach var of varlist ER30353 ER30382 ER30411 ER30441 ER30474 ER30509 ER30545  ///
				ER30580 ER30616 ER30653 ER30699 ER30744 ER30816 ER33111 ER33211  ///
				ER33311 ER33411  {
	local i= `i'+1
	recode `var' (1=1 "working now")(2=2 "Only temporarily laid off") (3=3 "Looking for work, unemployed") ///
	(4=4 "Retired")(5=5 "Permanently disabled")(6=6 "HouseWife; keeping house") ///
	(7=7 "Student")(8=8 "Other")(9=.a "NA; DK")(0=.b "Inap."), gen(empstat`i')
	lab var empstat`i' "Employment status `i'"
}

local i=1997	
foreach var of varlist ER33512 ER33612 ER33712 ER33813 ER33913 ER34016  ///
				ER34116 ER34216 ER34317 ER34516 {
	local i= `i'+2
	recode `var' (1=1 "working now")(2=2 "Only temporarily laid off") (3=3 "Looking for work, unemployed") ///
	(4=4 "Retired")(5=5 "Permanently disabled")(6=6 "HouseWife; keeping house") ///
	(7=7 "Student")(8=8 "Other")(9=.a "NA; DK")(0=.b "Inap."), gen(empstat`i')
	lab var empstat`i' "Employment status `i'"
}

*Age
local i=1967	
foreach var of varlist ER30004 ER30023 ER30046 ER30070 ER30094 ER30120 ER30141  ///
		ER30163 ER30191 ER30220 ER30249 ER30286 ER30316 ER30346 ER30376 ER30402  ///
		ER30432 ER30466 ER30501 ER30538 ER30573 ER30609 ER30645 ER30692 ER30736  ///
		ER30809 ER33104 ER33204 ER33304 ER33404 {
	local i= `i'+1
	recode `var' (999=.a "NA; DK")(0=.b "Born or moved in after the YEAR interview"), gen(age`i')
	lab var age`i' "Age `i'"
}

local i=1997	
foreach var of varlist ER33504 ER33604 ER33704 ER33804  ///
		ER33904 ER34004 ER34104 ER34204 ER34305 ER34504 {
	local i= `i'+2
	recode `var' (999=.a "NA; DK")(0=.b "Born or moved in after the YEAR interview"), gen(age`i')
	lab var age`i' "Age `i'"
}

*Sex
recode ER32000 (1=0 "male")(2=1 "female")(9=.a "DK"), gen(sex)
lab var sex "Sex"
clonevar female = sex

*Education
recode ER30110 (99=.a "NA; DK; refused")(0=.b "Inap."), gen(edu1972)
recode ER30181 (99=.a "NA; DK; refused")(0=.b "Inap."), gen(edu1975)
lab var edu1972 "Years completed education 1972"
lab var edu1975 "Years completed education 1975"

local i=1980
foreach var of varlist ER30356 ER30384 ER30413 ER30443 ER30478 ER30513 ER30549  ///
		ER30584 ER30620 ER30657 ER30703 ER30748 ER30820 ER33115 ER33215 ER33315 ER33415 {
	local i= `i'+1
	recode `var' (99=.a "NA; DK; refused")(0=.b "Inap."), gen(edu`i')
	lab var edu`i' "Years completed education `i'"
}

local i=1997
foreach var of varlist ER33516 ER33616 ER33716 ER33817 ER33917 ER34020 ///
		ER34119 ER34230 ER34349 ER34548 {
	local i= `i'+2
	recode `var' (99=.a "NA; DK; refused")(0=.b "Inap."), gen(edu`i')
	lab var edu`i' "Years completed education `i'"
}


*Family Income
* faminc* is :
/* Summation of the following variables:
	Taxable Income of Head and Wife
	Total Transfers of Head and Wife
	Taxable Income of Others
	Transfer Income of Others */
	
forvalues i = 1968/1997 {
	lab var faminc`i' "Total Family Income `i'"
}
forvalues i = 1999(2)2017 {
	lab var faminc`i' "Total Family Income `i'"
}

*Household size
forvalues i = 2001(2)2017 {
	lab var hhsize`i' "Household Size `i'"
}



*Survey Year
local i=1967	
foreach var of varlist ER30004 ER30023 ER30046 ER30070 ER30094 ER30120 ER30141  ///
		ER30163 ER30191 ER30220 ER30249 ER30286 ER30316 ER30346 ER30376 ER30402  ///
		ER30432 ER30466 ER30501 ER30538 ER30573 ER30609 ER30645 ER30692 ER30736  ///
		ER30809 ER33104 ER33204 ER33304 ER33404 {
	local i= `i' +1
	gen syear`i' =`i' if `var'!=0 
	replace syear`i' =0 if `var'==0
	lab var syear`i' "Survey Year `i'"
}

local i=1997	
foreach var of varlist ER33504 ER33604 ER33704 ER33804  ///
		ER33904 ER34004 ER34104 ER34204 ER34305 ER34504 {
	local i= `i' +2
	gen syear`i' =`i' if `var'!=0 
	replace syear`i' =0 if `var'==0
	lab var syear`i' "Survey Year `i'"
}

*** Weights
*cross-sectional

*longitudinal


*TAS
gen weight_ta2005 = TA050955      
gen weight_ta2007 = TA070937
gen weight_ta2009 = TA091001
gen weight_ta2011 = TA111143
gen weight_ta2013 = TA131234
gen weight_ta2015 = TA151294
gen weight_ta2017 = TA171987 / 388 	// change range from [388;87475] to [1;225.451]

lab var weight_ta2005 "Weight 2005: TAS"
lab var weight_ta2007 "Weight 2005: TAS"
lab var weight_ta2009 "Weight 2005: TAS"
lab var weight_ta2011 "Weight 2005: TAS"
lab var weight_ta2013 "Weight 2005: TAS"
lab var weight_ta2015 "Weight 2005: TAS"
lab var weight_ta2017 "Weight 2005: TAS"






********************
*** reshape long ***
********************

keep pid* famid* mpid fpid vote* lfsat* mhealth* empstat* age* sex edu* faminc* hhsize* weight* syear*

save "$OUT\psid_ta_fam_wide.dta", replace
erase "$OUT\psid_ta_fam.dta"


*reshape long
drop pid2*
reshape long famid@ faminc@ hhsize@ ///																			
			vote@  ///
			empstat@ age@ edu@ weight_ta@ , ///													
		i(pid) j(year) atwl ()

		
		
***recode data
drop if year<2000	// drop most data pre-TA
drop if faminc<0


*** recode variables
mvdecode faminc, mv(9999999)
mvdecode edu, mv(98)
mvdecode vote age sex empstat edu, mv(-10/-1)

*income
gen hhinc = faminc
gen hhinc_pp = faminc/hhsize
gen hhinc_pp_sqrt = faminc/(sqrt(hhsize))

xtile hhinc_dec = hhinc, nq(10) 
xtile hhinc_dec_pp = hhinc_pp, nq(10) 
xtile hhinc_dec_pp_sqrt = hhinc_pp_sqrt, nq(10) 

gen hhinc_dec10 = (hhinc_dec-1)*(10/9)
gen hhinc_dec_pp10 = (hhinc_dec_pp-1)*(10/9)
gen hhinc_dec_pp_sqrt10 = (hhinc_dec_pp_sqrt-1)*(10/9)


*political variables 
fre vote

*socio-demographics
clonevar female = sex
fre empstat
recode empstat (2 3=1 "unemployed")(1 4/8=0 "other")(else=.), gen(unemployed)
recode empstat (4/8=1 "not in labor force")(1/3=0 "other")(else=.), gen(nlf)

recode edu (1/11=1 "low")(12/17=0 "other"), gen(edu_low)
recode edu (12=1 "medium")(1/11 13/17=0 "other"), gen(edu_med)
recode edu (13/17=1 "high")(1/12=0 "other"), gen(edu_high)

recode age (13/25=1)(26/120=0)(else=.), gen(age1825)
recode age (13/35=1)(36/120=0)(else=.), gen(age1835)
gen age18 = age-18

*create interaction variables
gen hhinc_decXage = hhinc_dec * age18
gen hhinc_dec_ppXage = hhinc_dec_pp * age18
gen hhinc_dec_pp_sqrtXage = hhinc_dec_pp_sqrt * age18


gen hhinc_decXage1825 = hhinc_dec * age1825
gen hhinc_decXage1835 = hhinc_dec * age1835





save "$OUT\psid_ta_fam_long.dta", replace

